library(data.table)
library(dplyr)
library(DT)
library(ggplot2)
library(gtools)
library(vegan)
library(iNEXT)
library(fossil)
library(ggrepel)

1) 18S amplicons

1.1) Data overview

Let’s read the dataset and remove the samples containing less than 49975 reads:

## [1] 46808   132
##     OTUId st055_MD1013 st056_MD1035 st057_MD1042 st058_MD1080
## 1   OTU_2         9289         2293        16285         5140
## 2 OTU_106          738           25         1497         3309
## 3 OTU_403           81            3          238            6
## 4 OTU_139          106           23          211           58
## 5   OTU_4          230           78          776          150
## Warning in `[<-.factor`(`*tmp*`, thisvar, value = "NA"): invalid factor
## level, NA generated

## Warning in `[<-.factor`(`*tmp*`, thisvar, value = "NA"): invalid factor
## level, NA generated

## Warning in `[<-.factor`(`*tmp*`, thisvar, value = "NA"): invalid factor
## level, NA generated

## Warning in `[<-.factor`(`*tmp*`, thisvar, value = "NA"): invalid factor
## level, NA generated

## Warning in `[<-.factor`(`*tmp*`, thisvar, value = "NA"): invalid factor
## level, NA generated

## Warning in `[<-.factor`(`*tmp*`, thisvar, value = "NA"): invalid factor
## level, NA generated

## Warning in `[<-.factor`(`*tmp*`, thisvar, value = "NA"): invalid factor
## level, NA generated
## [1] 46808   131
##         st055_MD1013 st056_MD1035 st057_MD1042 st058_MD1080 st059_MD1100
## OTU_2           9289         2293        16285         5140         1187
## OTU_106          738           25         1497         3309         1808
## OTU_403           81            3          238            6           37
## OTU_139          106           23          211           58           82
## OTU_4            230           78          776          150          561
## [1] 46690   129
## [1] 46690     7
##                                                                                                                                             SILVA_classif
## OTU_2                                FJ832119.1.1585_Eukaryota;SAR;Alveolata;Protalveolata;Syndiniales;Syndiniales_Group_I;uncultured_marine_picoplankton
## OTU_106                                        AY665056.1.1740_Eukaryota;SAR;Alveolata;Protalveolata;Syndiniales;Syndiniales_Group_I;uncultured_eukaryote
## OTU_403                               KC488491.1.1699_Eukaryota;SAR;Alveolata;Protalveolata;Syndiniales;Syndiniales_Group_III;uncultured_marine_alveolate
## OTU_139                                       EF172945.1.1672_Eukaryota;SAR;Alveolata;Protalveolata;Syndiniales;Syndiniales_Group_II;uncultured_eukaryote
## OTU_4   KC488405.1.1558_Eukaryota;SAR;Alveolata;Dinoflagellata;Dinophyceae;Gymnodiniphycidae;Gymnodinium_clade;Erythropsidinium;uncultured_dinoflagellate
##         SILVA_consensus                    MAS_classif MAS_consensus
## OTU_2              <NA>      EU793773_MALV-I_Alveolata          <NA>
## OTU_106            <NA>      EU818437_MALV-I_Alveolata          <NA>
## OTU_403            <NA>    EU793615_MALV-III_Alveolata          <NA>
## OTU_139            <NA>     EU818480_MALV-II_Alveolata          <NA>
## OTU_4       Dinophyceae EU780636_Dinophyceae_Alveolata   Dinophyceae
##                            BM_classif BM_consensus
## OTU_2       3261_MALV-I_Alveolata_385         <NA>
## OTU_106      90_MALV-I_Alveolata_1447         <NA>
## OTU_403    38_MALV-III_Alveolata_6570         <NA>
## OTU_139                          <NA>         <NA>
## OTU_4   21474_Dinophyceae_Alveolata_6  Dinophyceae
##         SILVA_plus_MAS_plus_BM_classif
## OTU_2                             <NA>
## OTU_106                           <NA>
## OTU_403                           <NA>
## OTU_139                           <NA>
## OTU_4                      Dinophyceae
## [1] 46690   122
##         st055_MD1013 st056_MD1035 st057_MD1042 st058_MD1080 st059_MD1100
## OTU_2           9289         2293        16285         5140         1187
## OTU_106          738           25         1497         3309         1808
## OTU_403           81            3          238            6           37
## OTU_139          106           23          211           58           82
## OTU_4            230           78          776          150          561
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    4050   59560   98630  126100  157000  935800
## st056_MD1035 st063_MD1176 st064_MD1186 st067_MD1246 st083_MD1517 
##        21169        44047        48691        10358        11740 
##  st013_MD202 st137_MD2695  st029_MD506  st030_MD528   st004_MD60 
##        41299        35895         8514        21381        36838 
##  st035_MD621  st037_MD646  st046_MD834  st051_MD916  st053_MD962 
##        45669        17136        40589        38674        20969 
##  st054_MD985   st007_MD98 
##         4050        45207
## [1] 46690   105
## [1] 46690    92
## [1] 43966    92

Table dimensions and content outline:

## [1] 46690    92
##         st055_MD1013 st057_MD1042 st058_MD1080 st059_MD1100 st009_MD111
## OTU_2           9289        16285         5140         1187        8207
## OTU_106          738         1497         3309         1808          30
## OTU_403           81          238            6           37           0
## OTU_139          106          211           58           82         214
## OTU_4            230          776          150          561         337

Minimum number of reads per station:

min(colSums(tb18_tax_occur_min49975)) 
## [1] 49975

Maximum number of reads per station:

max(colSums(tb18_tax_occur_min49975)) 
## [1] 935755

Identification of stations with higher number of reads:

amplicons_per_sample<-colSums(tb18_tax_occur_min49975)
amplicons_per_sample[which(colSums(tb18_tax_occur_min49975)>300000)]
## st057_MD1042 st112_MD2051 st131_MD2516  st039_MD684 
##       308992       387935       935755       328173

Overall reads per sample:

1.2) Normalization

Let’s normalize the original dataset by randomly subsampling 49975 reads in each station:

tb18_tax_occur_min49975_t<-t(tb18_tax_occur_min49975)
tb18_tax_occur_ss49975<-rrarefy(tb18_tax_occur_min49975_t, 49975)

The normalized table shows the following dimensions and format:

## [1]    92 46690
##              OTU_2 OTU_106 OTU_403 OTU_139 OTU_4
## st055_MD1013  8976     706      76     104   222
## st057_MD1042  2644     244      34      34   132
## st058_MD1080  1295     855       3      16    28
## st059_MD1100   464     689      14      35   205
## st009_MD111   2131       5       0      64    93

Its content fits with the expected normalization values (49975 reads per station):

rowSums(tb18_tax_occur_ss49975)
##  st055_MD1013  st057_MD1042  st058_MD1080  st059_MD1100   st009_MD111 
##         49975         49975         49975         49975         49975 
##  st060_MD1125  st062_MD1143  st065_MD1209  st066_MD1222 st068b_MD1260 
##         49975         49975         49975         49975         49975 
##  st069_MD1293  st070_MD1302  st072_MD1331  st073_MD1354  st074_MD1368 
##         49975         49975         49975         49975         49975 
##  st075_MD1398   st010_MD141  st076_MD1421  st077_MD1425  st078_MD1459 
##         49975         49975         49975         49975         49975 
##  st079_MD1475  st081_MD1479  st082_MD1490  st085_MD1537  st086_MD1559 
##         49975         49975         49975         49975         49975 
##  st087_MD1589  st088_MD1607  st089_MD1629  st092_MD1672  st093_MD1700 
##         49975         49975         49975         49975         49975 
##  st094_MD1724  st095_MD1744  st096_MD1772  st097_MD1798  st098_MD1811 
##         49975         49975         49975         49975         49975 
##  st101_MD1857  st102_MD1885  st103_MD1887  st104_MD1928  st106_MD1956 
##         49975         49975         49975         49975         49975 
##  st107_MD1964   st012_MD196  st108_MD2004  st109_MD2008  st110_MD2047 
##         49975         49975         49975         49975         49975 
##  st112_MD2051  st114_MD2094  st115_MD2111   st014_MD214  st118_MD2155 
##         49975         49975         49975         49975         49975 
##  st120_MD2243  st126_MD2380  st127_MD2408  st128_MD2436  st129_MD2448 
##         49975         49975         49975         49975         49975 
##  st131_MD2516   st015_MD254  st132_MD2562  st133_MD2594  st134_MD2624 
##         49975         49975         49975         49975         49975 
##   st017_MD266  st138_MD2723  st140_MD2761   st018_MD285  st143_MD2864 
##         49975         49975         49975         49975         49975 
##    st001_MD28  st145_MD2956   st019_MD311   st022_MD346   st023_MD366 
##         49975         49975         49975         49975         49975 
##   st024_MD389    st002_MD40   st025_MD417   st026_MD439   st027_MD458 
##         49975         49975         49975         49975         49975 
##   st028_MD486    st003_MD52   st032_MD550   st033_MD575   st034_MD591 
##         49975         49975         49975         49975         49975 
##    st005_MD64   st038_MD664   st039_MD684   st040_MD712   st041_MD734 
##         49975         49975         49975         49975         49975 
##   st043_MD753   st044_MD778   st045_MD806   st047_MD854   st049_MD876 
##         49975         49975         49975         49975         49975 
##   st050_MD897   st052_MD940 
##         49975         49975

Let’s check out how many OTUs don’t appear in the new table:

length(which(colSums(tb18_tax_occur_ss49975)==0)) #8045
## [1] 8061

There are 8045 OTUs that don’t show any occurrence in the normalized data. Let’s remove them from the table and take a look at its final dimensions:

tb18_tax_occur_ss49975_no_cero<-tb18_tax_occur_ss49975[,-(which(colSums(tb18_tax_occur_ss49975)==0))]
tb18_tax_occur_ss49975_no_cero<-tb18_tax_occur_ss49975_no_cero[mixedorder(row.names(tb18_tax_occur_ss49975_no_cero)),]
dim(tb18_tax_occur_ss49975_no_cero) #92 38645
## [1]    92 38629

Datasets summary:

dim(tb18_tax) #46690   129
## [1] 46690   129
dim(tb18_tax_occur) #46690   122
## [1] 46690   122
dim(tb18_tax_occur_ss49975_no_cero) #92 38645
## [1]    92 38629

1.3) General community analysis

1.3.1) Richness and evenness (Shannon index)

Most of the samples take Shannon Index values around 6:

1.3.2) Richness: OTU number

Lowest number of OTUs per sample:

## [1] 1256

Maximum number of OTUs per sample:

## [1] 5182

In most of the samples, we can identify about 4000 OTUs:

plot(sort(OTUs_per_sample_18S_tax_occur_ss49975), pch=19)

boxplot(OTUs_per_sample_18S_tax_occur_ss49975, pch=19)

1.3.3) Index of evenness

1.3.3.1) Pielou’s index

pielou_evenness_18S_tax_occur_ss49975<-tb18_tax_occur_ss49975_div/log(OTUs_per_sample_18S_tax_occur_ss49975)

The Pielou index (constrained between 0 and 1) takes values closer to 1 as the variation of species proportion in a sample increases. Most of the samples get values between 0.7 and 0.8, meaning that the numerical composition of different OTUs in a sample is highly variable - there’s no constant presence of dominant species.

The less variation in communities between the species (and the presence of a dominant specie), the lower J’ is.

plot(sort(pielou_evenness_18S_tax_occur_ss49975), pch=19)

boxplot(pielou_evenness_18S_tax_occur_ss49975, pch=19)

The OTU_6, with 196592 reads, is the most abundant in the overall dataset:

head(sort(colSums(tb18_tax_occur_ss49975_no_cero), decreasing=T), n=10L)
##  OTU_6  OTU_2 OTU_23  OTU_8 OTU_25 OTU_27  OTU_1 OTU_12  OTU_7  OTU_9 
## 169592 161123  56523  51607  48699  45926  40334  39512  38398  35406

Most of the OTUs show very few occurrences; suggesting that we will probably be able to identify a significant ammount of rare otus:

plot(log(sort(colSums(tb18_tax_occur_ss49975_no_cero), decreasing=T)), pch=19)

1.3.4) Abundance Models

1.3.4.1) Rank-Abundance or Dominance/Diversity Model (“radfit”)

The OTUs abundance distribution fits relativelly close to log-normal model.

1.3.4.2) Preston’s Lognormal Model

According to Preston’s lognormal model fit into species frequencies groups, we’re missing ~2451 species:

tb18_tax_occur_ss49975_prestonfit<-prestonfit(colSums(tb18_tax_occur_min49975_t))
plot(tb18_tax_occur_ss49975_prestonfit, main="Pooled species")

veiledspec(tb18_tax_occur_ss49975_prestonfit)
## Extrapolated     Observed       Veiled 
##    46565.639    43966.000     2599.639

When computing Prestons’ lognormal model fit without pooling data into groups, we seem to miss ~2319 species:

## Extrapolated     Observed       Veiled 
##    46422.875    43966.000     2456.875

1.3.5) Rarefaction curves of rarefied and non-rarefied datasets

rarec_input<-t(as.matrix(colSums(tb18_tax_occur_ss49975_no_cero)))

tb18_rarecurve_step1000_40000<-rarecurve(rarec_input, step = 1000, 40000, xlab = "Sample size", ylab = "OTUs", label = TRUE, main="18S amplicons diversity step=1000 & ss=40000\n(40,816 OTUs; 5,247,375 reads)\n")

rarec_allOTUs_input<-t(as.matrix(colSums(t(tb18_tax_occur))))
tb18_rarecurve_allOTUs_step1000_46690<-rarecurve(rarec_allOTUs_input, step = 1000, 46690, xlab = "Sample size", ylab = "OTUs", label = TRUE, main="18S amplicons diversity non-rarefied step=1000 & ss=100000\n(46,690 OTUs; 15,386,452 reads)\n")

1.3.6) Beta diversity

1.3.6.1) Dissimilarity matrix using Bray-Curtis index:

The Bray-Curtis dissimilarity, constrained between 0 (minimum distance) and 1 (highest dissimilarity) allows us to quantify the differences between samples according to the composition and relative abundance of their OTUs. In our dataset, most of the samples pairs take dissimilarity values between between 6.5 and 7.5, meaning that their composition is substantially different.

1.3.6.2) Hierarchical clustering

The stations seem to form clusters according to geographic localization, but there are no evident clusters separated from the general groups.

(To be done: assign Longhurst provinces information to each station and check if any of the central clusters is meaningful regarding to the samples’ geographical location)

1.3.6.3) Non-metric multidimensional scaling

We can identify a prominent group in the central part of the NMDS plot and a few outliers in the central-high edge of the plot. The stress parameter takes a value below 0.2, suggesting that the plot is acceptable.

## 
## Call:
## monoMDS(dist = tb18_tax_occur_ss49975_no_cero.bray) 
## 
## Non-metric Multidimensional Scaling
## 
## 92 points, dissimilarity 'bray', call 'vegdist(x = tb18_tax_occur_ss49975_no_cero, method = "bray")'
## 
## Dimensions: 2 
## Stress:     0.2113352 
## Stress type 1, weak ties
## Scores scaled to unit root mean square, rotated to principal components
## Stopped after 154 iterations: Stress nearly unchanged (ratio > sratmax)

When implementing a most robut function for computing NMDS plots, the result is quiet the same:

## Run 0 stress 0.2048976 
## Run 1 stress 0.2130134 
## Run 2 stress 0.2043258 
## ... New best solution
## ... Procrustes: rmse 0.0169961  max resid 0.1571898 
## Run 3 stress 0.2049018 
## Run 4 stress 0.4113734 
## Run 5 stress 0.2043288 
## ... Procrustes: rmse 0.0004151054  max resid 0.00329998 
## ... Similar to previous best
## Run 6 stress 0.2149611 
## Run 7 stress 0.2049333 
## Run 8 stress 0.2063113 
## Run 9 stress 0.2049063 
## Run 10 stress 0.2049423 
## Run 11 stress 0.2055204 
## Run 12 stress 0.2048942 
## Run 13 stress 0.2063229 
## Run 14 stress 0.2194009 
## Run 15 stress 0.2063571 
## Run 16 stress 0.2048972 
## Run 17 stress 0.2055217 
## Run 18 stress 0.2048899 
## Run 19 stress 0.2134129 
## Run 20 stress 0.2055332 
## *** Solution reached
## Warning in ordiplot(x, choices = choices, type = type, display = display, :
## Species scores not available

1.4) Geographical analysis

## Warning in if (class(lats) == "SpatialPoints") lats <- coordinates(lats):
## the condition has length > 1 and only the first element will be used

Working datasets:

  1. Community matrix: tb18_tax_occur_ss49975_no_cero
dim(tb18_tax_occur_ss49975_no_cero)
## [1]    92 38629
tb18_tax_occur_ss49975_no_cero[1:5, 1:5]
##             OTU_2 OTU_106 OTU_403 OTU_139 OTU_4
## st001_MD28    131      44       0       5    53
## st002_MD40    268       8       9       5    44
## st003_MD52    736      23      41      15    34
## st005_MD64    854     103      10       0   126
## st009_MD111  2131       5       0      64    93
  1. Community Bray-Curtis: tb18_tax_occur_ss49975_no_cero.bray
dim(tb18_tax_occur_ss49975_no_cero.bray)
## [1] 92 92
  1. Stations distances in km: geo_distances_MP_18S
dim(geo_distances_MP_18S)
## [1] 92 92

Communities quickly change their composition across geographical distances:

plot(geo_distances_MP_18S, tb18_tax_occur_ss49975_no_cero.bray, pch=19, cex=0.4, xlab="Geopgraphical distances", ylab="Bray-Curtis dissimilarities")

1.4.1) Mantel correlograms

Mantel statistic is -significantlly- so low, meaning that the correlation between samples dissimilarity and geographical distances is weak.

mantel(geo_distances_MP_18S, tb18_tax_occur_ss49975_no_cero.bray)
## 
## Mantel statistic based on Pearson's product-moment correlation 
## 
## Call:
## mantel(xdis = geo_distances_MP_18S, ydis = tb18_tax_occur_ss49975_no_cero.bray) 
## 
## Mantel statistic r: 0.1731 
##       Significance: 0.001 
## 
## Upper quantiles of permutations (null model):
##    90%    95%  97.5%    99% 
## 0.0218 0.0293 0.0358 0.0425 
## Permutation: free
## Number of permutations: 999

Maximum distance between samples:

## [1] 19500.19

Minimum distance between samples:

## [1] 0

Correlograms:

MP_18s_ss49975_mantel_correl_by_1000km<-mantel.correlog(tb18_tax_occur_ss49975_no_cero.bray, D.geo=geo_distances_MP_18S, break.pts=seq(0,20000, by=1000))
plot(MP_18s_ss49975_mantel_correl_by_1000km)

MP_18s_ss49975_mantel_correl_by_100km<-mantel.correlog(tb18_tax_occur_ss49975_no_cero.bray, D.geo=geo_distances_MP_18S, break.pts=seq(0,20000, by=100))
plot(MP_18s_ss49975_mantel_correl_by_100km)

1.5) Abundance vs. occurence

In the following plot, we can appreciate the OTUs distribution according to their percentage of occurence and relative abundance. The red line keeps up OTUs that occur in more than 80% of the samples, the green line limits regionally rare OTUs (< 0.001%), and the blue one restricts regionally abundant OTUs (> 0.1%).

Regionally abundant OTUs (relative abundance over 0.1%):

tb18_ss49975_abundant_sorted_prov<-cbind(otu_names=row.names(tb18_ss49975_abundant_sorted),tb18_ss49975_abundant_sorted)
##     otu_names mean_rabund perc_occur SILVA_plus_MAS_plus_BM_classif
## 1       OTU_1 0.008772647  100.00000                    Dinophyceae
## 2      OTU_10 0.004647541   91.30435                           <NA>
## 3     OTU_101 0.002379451   86.95652                           <NA>
## 4     OTU_102 0.001934880   80.43478                           <NA>
## 5   OTU_10246 0.001144268   76.08696                           <NA>
## 6     OTU_103 0.001510103   90.21739                           <NA>
## 7     OTU_104 0.001518803   80.43478                           <NA>
## 8     OTU_105 0.001420493   46.73913                           <NA>
## 9     OTU_106 0.003904561   88.04348                           <NA>
## 10    OTU_108 0.001519890   71.73913                           <NA>
## 11    OTU_109 0.001773060   64.13043                           <NA>
## 12     OTU_11 0.007058094  100.00000                           <NA>
## 13    OTU_110 0.001633208   84.78261                    Dinophyceae
## 14    OTU_111 0.001425278   94.56522                    Dinophyceae
## 15    OTU_112 0.001392653   94.56522                    Dinophyceae
## 16    OTU_113 0.001118820   76.08696                           <NA>
## 17   OTU_1136 0.001289775   98.91304                    Dinophyceae
## 18  OTU_11454 0.001849838  100.00000                           <NA>
## 19    OTU_115 0.001266068   47.82609                           <NA>
## 20    OTU_116 0.001787633   83.69565                           <NA>
## 21    OTU_117 0.001023338   91.30435                           <NA>
## 22    OTU_118 0.001006373   70.65217                           <NA>
## 23  OTU_11833 0.001151010   84.78261                           <NA>
## 24    OTU_119 0.002767906   28.26087                           <NA>
## 25     OTU_12 0.008593862  100.00000                           <NA>
## 26    OTU_120 0.001078365   68.47826                           <NA>
## 27    OTU_121 0.001687148   98.91304                    Dinophyceae
## 28    OTU_122 0.001221698   66.30435                           <NA>
## 29    OTU_123 0.001025513   73.91304                           <NA>
## 30    OTU_124 0.001649738   82.60870                           <NA>
## 31    OTU_125 0.001232355   96.73913                           <NA>
## 32    OTU_126 0.001682580   96.73913                           <NA>
## 33    OTU_128 0.001146443   84.78261                           <NA>
## 34    OTU_129 0.001082498   66.30435                  Chrysophyceae
## 35     OTU_13 0.007238402   97.82609                           <NA>
## 36    OTU_130 0.001074668  100.00000                           <NA>
## 37    OTU_131 0.001257368   96.73913               Dictyochophyceae
## 38    OTU_132 0.001183853   61.95652                           <NA>
## 39    OTU_133 0.001000935   57.60870                           <NA>
## 40    OTU_135 0.001419623   89.13043                  Chrysophyceae
## 41    OTU_136 0.001146660   52.17391                           <NA>
## 42    OTU_137 0.002185441   61.95652                           <NA>
## 43     OTU_14 0.003708158   92.39130                           <NA>
## 44    OTU_140 0.001117950   79.34783                           <NA>
## 45    OTU_141 0.001080540   66.30435                           <NA>
## 46    OTU_142 0.001206255   72.82609        Prasinophyceae_clade-IX
## 47    OTU_144 0.001044218   92.39130                           <NA>
## 48    OTU_145 0.001232355   86.95652                           <NA>
## 49    OTU_146 0.001199078   66.30435                           <NA>
## 50  OTU_14696 0.006883659   96.73913                           <NA>
## 51     OTU_15 0.003846488   55.43478                           <NA>
## 52    OTU_151 0.001046393   65.21739                  Chrysophyceae
## 53    OTU_157 0.001019205   44.56522                           <NA>
## 54     OTU_16 0.002011223   36.95652       Prasinophyceae_clade-VII
## 55    OTU_161 0.001170803   54.34783                           <NA>
## 56    OTU_162 0.001259543   76.08696                           <NA>
## 57    OTU_163 0.001031168   78.26087                           <NA>
## 58    OTU_165 0.001167975   80.43478                           <NA>
## 59     OTU_17 0.004036366   94.56522                           <NA>
## 60    OTU_174 0.001095765   73.91304                           <NA>
## 61    OTU_177 0.001219740   92.39130                           <NA>
## 62    OTU_179 0.001121865   71.73913                           <NA>
## 63     OTU_18 0.006814059  100.00000                    Dinophyceae
## 64    OTU_180 0.001238880   69.56522                           <NA>
## 65   OTU_1819 0.001168410   69.56522                           <NA>
## 66   OTU_1842 0.001046610   95.65217                    Dinophyceae
## 67    OTU_188 0.001339365   95.65217                    Dinophyceae
## 68   OTU_1882 0.001395480   69.56522                           <NA>
## 69     OTU_19 0.004412859  100.00000                           <NA>
## 70    OTU_192 0.001192118   95.65217                           <NA>
## 71      OTU_2 0.035044261  100.00000                           <NA>
## 72     OTU_20 0.007376297   54.34783                  Pelagophyceae
## 73     OTU_21 0.005322009   94.56522                           <NA>
## 74     OTU_22 0.004715619   91.30435                           <NA>
## 75    OTU_220 0.001024643   95.65217                           <NA>
## 76     OTU_23 0.012293756   96.73913                           <NA>
## 77    OTU_235 0.001173630   84.78261                           <NA>
## 78     OTU_24 0.006683777   98.91304                           <NA>
## 79    OTU_243 0.001317833   97.82609                           <NA>
## 80     OTU_25 0.010592035  100.00000                    Dinophyceae
## 81     OTU_26 0.005064271   96.73913                           <NA>
## 82     OTU_27 0.009988907  100.00000                           <NA>
## 83     OTU_28 0.004075081   95.65217                           <NA>
## 84     OTU_29 0.002512126   98.91304                           <NA>
## 85      OTU_3 0.005866629   85.86957                           <NA>
## 86     OTU_30 0.003419971   72.82609                           <NA>
## 87     OTU_31 0.003338843   95.65217                           <NA>
## 88     OTU_32 0.002127803   77.17391                           <NA>
## 89     OTU_33 0.003103291   98.91304                    Dinophyceae
## 90    OTU_338 0.001162973   95.65217                    Dinophyceae
## 91     OTU_34 0.005778106   71.73913                  Chrysophyceae
## 92     OTU_35 0.002468843   86.95652                           <NA>
## 93  OTU_35494 0.001208213   68.47826                           <NA>
## 94  OTU_35799 0.002926463  100.00000                    Dinophyceae
## 95     OTU_36 0.003820388   89.13043                           <NA>
## 96     OTU_37 0.002746808   95.65217                           <NA>
## 97     OTU_38 0.003358636   92.39130                           <NA>
## 98     OTU_39 0.003165496   98.91304                    Dinophyceae
## 99   OTU_3988 0.001437240   80.43478                           <NA>
## 100     OTU_4 0.007469604  100.00000                    Dinophyceae
## 101    OTU_40 0.004207104  100.00000                    Dinophyceae
## 102    OTU_41 0.002774866   93.47826                           <NA>
## 103    OTU_42 0.002942776   95.65217                           <NA>
## 104    OTU_43 0.002659156   68.47826                           <NA>
## 105    OTU_44 0.004336299   48.91304                 Prasinophyceae
## 106    OTU_45 0.004710616  100.00000                    Dinophyceae
## 107    OTU_46 0.002661113   28.26087                 Prasinophyceae
## 108    OTU_47 0.003102203   73.91304                           <NA>
## 109    OTU_48 0.002643278   93.47826                           <NA>
## 110    OTU_49 0.002859908   98.91304                    Dinophyceae
## 111     OTU_5 0.004774996   79.34783                           <NA>
## 112    OTU_50 0.003929791   96.73913                           <NA>
## 113    OTU_51 0.003430193   88.04348                           <NA>
## 114    OTU_52 0.002800748   85.86957                           <NA>
## 115    OTU_53 0.002048633   39.13043                           <NA>
## 116    OTU_54 0.001941405   94.56522                           <NA>
## 117    OTU_55 0.003479348  100.00000                           <NA>
## 118    OTU_56 0.002291798   66.30435                           <NA>
## 119  OTU_5618 0.003471301   96.73913                           <NA>
## 120    OTU_57 0.001676273   94.56522                           <NA>
## 121    OTU_58 0.003224873   73.91304                           <NA>
## 122    OTU_59 0.001705635   77.17391                           <NA>
## 123     OTU_6 0.036886269  100.00000                           <NA>
## 124    OTU_61 0.001685625   81.52174                           <NA>
## 125    OTU_62 0.002199796   83.69565                           <NA>
## 126    OTU_63 0.001269983   56.52174       Prasinophyceae_clade-VII
## 127  OTU_6315 0.002216761   92.39130                           <NA>
## 128    OTU_64 0.003872153  100.00000                           <NA>
## 129    OTU_65 0.002889706   97.82609                           <NA>
## 130    OTU_66 0.002551928   97.82609                    Dinophyceae
## 131    OTU_67 0.001699328   98.91304                    Dinophyceae
## 132    OTU_68 0.002422733   96.73913                           <NA>
## 133    OTU_69 0.002504948   56.52174                           <NA>
## 134     OTU_7 0.008351567  100.00000                           <NA>
## 135    OTU_70 0.003651608   77.17391                           <NA>
## 136    OTU_71 0.002629576  100.00000                           <NA>
## 137    OTU_72 0.003060443   97.82609                           <NA>
## 138    OTU_73 0.002135416   73.91304                           <NA>
## 139    OTU_74 0.002421646   96.73913                           <NA>
## 140    OTU_75 0.002597386   97.82609                           <NA>
## 141    OTU_76 0.001797203   89.13043                           <NA>
## 142   OTU_767 0.001081845   32.60870                           <NA>
## 143    OTU_77 0.001420058   66.30435                    Dinophyceae
## 144    OTU_78 0.002143246   98.91304                    Dinophyceae
## 145  OTU_7889 0.002225026   81.52174                           <NA>
## 146     OTU_8 0.011224525   97.82609                           <NA>
## 147    OTU_80 0.002247428   70.65217                           <NA>
## 148    OTU_81 0.003464558   98.91304                           <NA>
## 149    OTU_82 0.002185223   91.30435                           <NA>
## 150    OTU_83 0.001569480   71.73913                           <NA>
## 151    OTU_84 0.002935163   73.91304                           <NA>
## 152    OTU_85 0.002136721   92.39130                           <NA>
## 153    OTU_87 0.001936185   92.39130               Dictyochophyceae
## 154    OTU_88 0.001396568   92.39130                           <NA>
## 155    OTU_89 0.002109316   78.26087                           <NA>
## 156  OTU_8924 0.001071188   72.82609                           <NA>
## 157     OTU_9 0.007700807   76.08696                           <NA>
## 158    OTU_90 0.001681710   97.82609                           <NA>
## 159    OTU_91 0.001151228   69.56522                           <NA>
## 160    OTU_92 0.001981425   73.91304                           <NA>
## 161    OTU_93 0.001196250   90.21739                           <NA>
## 162    OTU_94 0.001318268   94.56522                           <NA>
## 163    OTU_95 0.001564695   85.86957                           <NA>
## 164    OTU_96 0.002075603   66.30435                           <NA>
## 165    OTU_98 0.001601888   85.86957                           <NA>
## 166    OTU_99 0.001284338   70.65217                           <NA>
## [1] 166   4

Proportion of regionally abundant OTUs (%):

## [1] 0.429729

Cosmopolitan OTUs (relative abundance over 0.1% and occurence in more than 80% of samples):

otu_tb18_ss49975_cosmop_sorted_prov<-merge(otu_tb18_ss49975_cosmop_sorted_prov,tb18_class_prov, by="otu_names", all.x=TRUE)
##     otu_names mean_rabund perc_occur SILVA_plus_MAS_plus_BM_classif
## 1       OTU_1 0.008772647  100.00000                    Dinophyceae
## 2      OTU_10 0.004647541   91.30435                           <NA>
## 3     OTU_101 0.002379451   86.95652                           <NA>
## 4     OTU_102 0.001934880   80.43478                           <NA>
## 5     OTU_103 0.001510103   90.21739                           <NA>
## 6     OTU_104 0.001518803   80.43478                           <NA>
## 7     OTU_106 0.003904561   88.04348                           <NA>
## 8      OTU_11 0.007058094  100.00000                           <NA>
## 9     OTU_110 0.001633208   84.78261                    Dinophyceae
## 10    OTU_111 0.001425278   94.56522                    Dinophyceae
## 11    OTU_112 0.001392653   94.56522                    Dinophyceae
## 12   OTU_1136 0.001289775   98.91304                    Dinophyceae
## 13  OTU_11454 0.001849838  100.00000                           <NA>
## 14    OTU_116 0.001787633   83.69565                           <NA>
## 15    OTU_117 0.001023338   91.30435                           <NA>
## 16  OTU_11833 0.001151010   84.78261                           <NA>
## 17     OTU_12 0.008593862  100.00000                           <NA>
## 18    OTU_121 0.001687148   98.91304                    Dinophyceae
## 19    OTU_124 0.001649738   82.60870                           <NA>
## 20    OTU_125 0.001232355   96.73913                           <NA>
## 21    OTU_126 0.001682580   96.73913                           <NA>
## 22    OTU_128 0.001146443   84.78261                           <NA>
## 23     OTU_13 0.007238402   97.82609                           <NA>
## 24    OTU_130 0.001074668  100.00000                           <NA>
## 25    OTU_131 0.001257368   96.73913               Dictyochophyceae
## 26    OTU_135 0.001419623   89.13043                  Chrysophyceae
## 27     OTU_14 0.003708158   92.39130                           <NA>
## 28    OTU_144 0.001044218   92.39130                           <NA>
## 29    OTU_145 0.001232355   86.95652                           <NA>
## 30  OTU_14696 0.006883659   96.73913                           <NA>
## 31    OTU_165 0.001167975   80.43478                           <NA>
## 32     OTU_17 0.004036366   94.56522                           <NA>
## 33    OTU_177 0.001219740   92.39130                           <NA>
## 34     OTU_18 0.006814059  100.00000                    Dinophyceae
## 35   OTU_1842 0.001046610   95.65217                    Dinophyceae
## 36    OTU_188 0.001339365   95.65217                    Dinophyceae
## 37     OTU_19 0.004412859  100.00000                           <NA>
## 38    OTU_192 0.001192118   95.65217                           <NA>
## 39      OTU_2 0.035044261  100.00000                           <NA>
## 40     OTU_21 0.005322009   94.56522                           <NA>
## 41     OTU_22 0.004715619   91.30435                           <NA>
## 42    OTU_220 0.001024643   95.65217                           <NA>
## 43     OTU_23 0.012293756   96.73913                           <NA>
## 44    OTU_235 0.001173630   84.78261                           <NA>
## 45     OTU_24 0.006683777   98.91304                           <NA>
## 46    OTU_243 0.001317833   97.82609                           <NA>
## 47     OTU_25 0.010592035  100.00000                    Dinophyceae
## 48     OTU_26 0.005064271   96.73913                           <NA>
## 49     OTU_27 0.009988907  100.00000                           <NA>
## 50     OTU_28 0.004075081   95.65217                           <NA>
## 51     OTU_29 0.002512126   98.91304                           <NA>
## 52      OTU_3 0.005866629   85.86957                           <NA>
## 53     OTU_31 0.003338843   95.65217                           <NA>
## 54     OTU_33 0.003103291   98.91304                    Dinophyceae
## 55    OTU_338 0.001162973   95.65217                    Dinophyceae
## 56     OTU_35 0.002468843   86.95652                           <NA>
## 57  OTU_35799 0.002926463  100.00000                    Dinophyceae
## 58     OTU_36 0.003820388   89.13043                           <NA>
## 59     OTU_37 0.002746808   95.65217                           <NA>
## 60     OTU_38 0.003358636   92.39130                           <NA>
## 61     OTU_39 0.003165496   98.91304                    Dinophyceae
## 62   OTU_3988 0.001437240   80.43478                           <NA>
## 63      OTU_4 0.007469604  100.00000                    Dinophyceae
## 64     OTU_40 0.004207104  100.00000                    Dinophyceae
## 65     OTU_41 0.002774866   93.47826                           <NA>
## 66     OTU_42 0.002942776   95.65217                           <NA>
## 67     OTU_45 0.004710616  100.00000                    Dinophyceae
## 68     OTU_48 0.002643278   93.47826                           <NA>
## 69     OTU_49 0.002859908   98.91304                    Dinophyceae
## 70     OTU_50 0.003929791   96.73913                           <NA>
## 71     OTU_51 0.003430193   88.04348                           <NA>
## 72     OTU_52 0.002800748   85.86957                           <NA>
## 73     OTU_54 0.001941405   94.56522                           <NA>
## 74     OTU_55 0.003479348  100.00000                           <NA>
## 75   OTU_5618 0.003471301   96.73913                           <NA>
## 76     OTU_57 0.001676273   94.56522                           <NA>
## 77      OTU_6 0.036886269  100.00000                           <NA>
## 78     OTU_61 0.001685625   81.52174                           <NA>
## 79     OTU_62 0.002199796   83.69565                           <NA>
## 80   OTU_6315 0.002216761   92.39130                           <NA>
## 81     OTU_64 0.003872153  100.00000                           <NA>
## 82     OTU_65 0.002889706   97.82609                           <NA>
## 83     OTU_66 0.002551928   97.82609                    Dinophyceae
## 84     OTU_67 0.001699328   98.91304                    Dinophyceae
## 85     OTU_68 0.002422733   96.73913                           <NA>
## 86      OTU_7 0.008351567  100.00000                           <NA>
## 87     OTU_71 0.002629576  100.00000                           <NA>
## 88     OTU_72 0.003060443   97.82609                           <NA>
## 89     OTU_74 0.002421646   96.73913                           <NA>
## 90     OTU_75 0.002597386   97.82609                           <NA>
## 91     OTU_76 0.001797203   89.13043                           <NA>
## 92     OTU_78 0.002143246   98.91304                    Dinophyceae
## 93   OTU_7889 0.002225026   81.52174                           <NA>
## 94      OTU_8 0.011224525   97.82609                           <NA>
## 95     OTU_81 0.003464558   98.91304                           <NA>
## 96     OTU_82 0.002185223   91.30435                           <NA>
## 97     OTU_85 0.002136721   92.39130                           <NA>
## 98     OTU_87 0.001936185   92.39130               Dictyochophyceae
## 99     OTU_88 0.001396568   92.39130                           <NA>
## 100    OTU_90 0.001681710   97.82609                           <NA>
## 101    OTU_93 0.001196250   90.21739                           <NA>
## 102    OTU_94 0.001318268   94.56522                           <NA>
## 103    OTU_95 0.001564695   85.86957                           <NA>
## 104    OTU_98 0.001601888   85.86957                           <NA>
## [1] 104   4

Number and proportion (%) of cosmopolitan OTUs:

## [1] 104
## [1] 0.2692278

Number and proportion (%) of rare OTUs:

nrow(otu_tb18_ss49975_rabund_percoccur[otu_tb18_ss49975_rabund_percoccur$mean_rabund < 0.00001 & otu_tb18_ss49975_rabund_percoccur$mean_rabund >0,])
## [1] 32851
## [1] 85.04233

1.6) Taxonomic composition analysis

1.6.1) Normalized data

1.6.1.1) Absolute values

Let’s add the taxonomic classification by merging “tb18_tax_occur_ss49975_no_cero” with “tb18_tax”:

## [1] 38629   100
##   Row.names st001_MD28 st002_MD40 st003_MD52 st005_MD64
## 1     OTU_1        131        132        234        149
## 2    OTU_10          4         51        167         77
## 3   OTU_100         90        207         24         47
## 4  OTU_1000          1          0          8          0
## 5 OTU_10000          2          4          5          0
## [1] 38629    99
##           st001_MD28 st002_MD40 st003_MD52 st005_MD64 st009_MD111
## OTU_1            131        132        234        149         240
## OTU_10             4         51        167         77          94
## OTU_100           90        207         24         47         340
## OTU_1000           1          0          8          0           0
## OTU_10000          2          4          5          0           0
## [1] 38629   100
##       st001_MD28 st002_MD40 st003_MD52 st005_MD64 st009_MD111
## OTU_1        131        132        234        149         240
## OTU_2        131        268        736        854        2131
## OTU_3        334       1499       1574        433         286
## OTU_4         53         44         34        126          93
## OTU_5          0        109        276         41          98
## [1] 12040   100
#create a table per group and count in how many samples they occur. 
Dinophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Dinophyceae"),]
Dinophyceae_tb[1:5,1:5]
##      st001_MD28 st002_MD40 st003_MD52 st005_MD64 st009_MD111
## NA           NA         NA         NA         NA          NA
## NA.1         NA         NA         NA         NA          NA
## NA.2         NA         NA         NA         NA          NA
## NA.3         NA         NA         NA         NA          NA
## NA.4         NA         NA         NA         NA          NA
Dinophyceae_tb_occur <- Dinophyceae_tb[,1:92]
Dinophyceae_tb_occur[1:5,1:5]
##      st001_MD28 st002_MD40 st003_MD52 st005_MD64 st009_MD111
## NA           NA         NA         NA         NA          NA
## NA.1         NA         NA         NA         NA          NA
## NA.2         NA         NA         NA         NA          NA
## NA.3         NA         NA         NA         NA          NA
## NA.4         NA         NA         NA         NA          NA
dim(Dinophyceae_tb_occur)
## [1]  0 92
length(Dinophyceae_tb_occur[,colSums(Dinophyceae_tb_occur) > 0])
## [1] 0
#Dinophyceae_tb_samples <- Dinophyceae_tb_occur[,colSums(Dinophyceae_tb_occur) > 0]
#length(Dinophyceae_tb_samples[which(colSums(Dinophyceae_tb_occur) != 0)])

Prasinophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "other_Prasinophyceae"),]
Prasinophyceae_tb_occur <- Prasinophyceae_tb[,1:92]
length(Prasinophyceae_tb_occur[,colSums(Prasinophyceae_tb_occur) > 0])
## [1] 0
Chrysophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Chrysophyceae"),]
Chrysophyceae_tb_occur <- Chrysophyceae_tb[,1:92]
length(Chrysophyceae_tb_occur[,colSums(Chrysophyceae_tb_occur) > 0])
## [1] 0
Pelagophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Pelagophyceae"),]
Pelagophyceae_tb_occur <- Pelagophyceae_tb[,1:92]
length(Pelagophyceae_tb_occur[,colSums(Pelagophyceae_tb_occur) > 0])
## [1] 0
Dictyochophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Dictyochophyceae"),]
Dictyochophyceae_tb_occur <- Dictyochophyceae_tb[,1:92]
length(Dictyochophyceae_tb_occur[,colSums(Dictyochophyceae_tb_occur) > 0])
## [1] 0
Cryptomonadales_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Cryptophyceae"),]
Cryptomonadales_tb_occur <- Cryptomonadales_tb[,1:92]
length(Cryptomonadales_tb_occur[,colSums(Cryptomonadales_tb_occur) > 0])
## [1] 0
Bacillariophyta_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Bacillariophyceae"),]
Bacillariophyta_tb_occur <- Bacillariophyta_tb[,1:92]
length(Bacillariophyta_tb_occur[,colSums(Bacillariophyta_tb_occur) > 0])
## [1] 0
Chlorarachniophyta_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Chlorarachniophyceae"),]
Chlorarachniophyta_tb_occur <- Chlorarachniophyta_tb[,1:92]
length(Chlorarachniophyta_tb_occur[,colSums(Chlorarachniophyta_tb_occur) > 0])
## [1] 0
Bolidophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Bolidophyceae"),]
Bolidophyceae_tb_occur <- Bolidophyceae_tb[,1:92]
length(Bolidophyceae_tb_occur[,colSums(Bolidophyceae_tb_occur) > 0])
## [1] 0
Pinguiochysidales_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Pinguiophyceae"),]
Pinguiochysidales_tb_occur <- Pinguiochysidales_tb[,1:92]
length(Pinguiochysidales_tb_occur[,colSums(Pinguiochysidales_tb_occur) > 0])
## [1] 0
Prymnesiophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Prymnesiophyceae"),]
Prymnesiophyceae_tb_occur <- Prymnesiophyceae_tb[,1:92]
length(Prymnesiophyceae_tb_occur[,colSums(Prymnesiophyceae_tb_occur) > 0])
## [1] 0
Mamiellophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Mamiellophyceae"),]
Mamiellophyceae_tb_occur <- Mamiellophyceae_tb[,1:92]
length(Mamiellophyceae_tb_occur[,colSums(Mamiellophyceae_tb_occur) > 0])
## [1] 0
Eustigmatales_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Eustigmatophyceae"),]
Eustigmatales_tb_occur <- Eustigmatales_tb[,1:92]
length(Eustigmatales_tb_occur[,colSums(Eustigmatales_tb_occur) > 0])
## [1] 0
Chlorophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Chlorophyceae"),]
Chlorophyceae_tb_occur <- Chlorophyceae_tb[,1:92]
length(Chlorophyceae_tb_occur[,colSums(Chlorophyceae_tb_occur) > 0])
## [1] 0
Ulvophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Ulvophyceae"),]
Ulvophyceae_tb_occur <- Ulvophyceae_tb[,1:92]
length(Ulvophyceae_tb_occur[,colSums(Ulvophyceae_tb_occur) > 0])
## [1] 0
Raphydophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Raphydophyceae"),]
Raphydophyceae_tb_occur <- Raphydophyceae_tb[,1:92]
length(Raphydophyceae_tb_occur[,colSums(Raphydophyceae_tb_occur) > 0])
## [1] 0
Trebouxiophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Trebouxiophyceae"),]
Trebouxiophyceae_tb_occur <- Trebouxiophyceae_tb[,1:92]
length(Trebouxiophyceae_tb_occur[,colSums(Trebouxiophyceae_tb_occur) > 0])
## [1] 0
Phaeophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Phaeophyceae"),]
Phaeophyceae_tb_occur <- Phaeophyceae_tb[,1:92]
length(Phaeophyceae_tb_occur[,colSums(Phaeophyceae_tb_occur) > 0])
## [1] 0
Phaeothamniophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Phaeothamniophyceae"),]
Phaeothamniophyceae_tb_occur <- Phaeothamniophyceae_tb[,1:92]
length(Phaeothamniophyceae_tb_occur[,colSums(Phaeothamniophyceae_tb_occur) > 0])
## [1] 0
Xanthophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Xanthophyceae"),]
Xanthophyceae_tb_occur <- Xanthophyceae_tb[,1:92]
length(Xanthophyceae_tb_occur[,colSums(Xanthophyceae_tb_occur) > 0])
## [1] 0
Chlorodendrophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Chlorodendrophyceae"),]
Chlorodendrophyceae_tb_occur <- Chlorodendrophyceae_tb[,1:92]
length(Chlorodendrophyceae_tb_occur[,colSums(Chlorodendrophyceae_tb_occur) > 0])
## [1] 0
IncertaeSedis_Archaeplastida_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "IncertaeSedis_Archaeplastida"),]
IncertaeSedis_Archaeplastida_tb_occur <- IncertaeSedis_Archaeplastida_tb[,1:92]
length(IncertaeSedis_Archaeplastida_tb_occur[,colSums(IncertaeSedis_Archaeplastida_tb_occur) > 0])
## [1] 0
Nephroselmidophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Nephroselmidophyceae"),]
Nephroselmidophyceae_tb_occur <- Nephroselmidophyceae_tb[,1:92]
length(Nephroselmidophyceae_tb_occur[,colSums(Nephroselmidophyceae_tb_occur) > 0])
## [1] 0
Pavlovophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Pavlovophyceae"),]
Pavlovophyceae_tb_occur <- Pavlovophyceae_tb[,1:92]
length(Pavlovophyceae_tb_occur[,colSums(Pavlovophyceae_tb_occur) > 0])
## [1] 0
Rhodophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Rhodophyceae"),]
Rhodophyceae_tb_occur <- Rhodophyceae_tb[,1:92]
length(Rhodophyceae_tb_occur[,colSums(Rhodophyceae_tb_occur) > 0])
## [1] 0
Rappemonads_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Rappemonads"),]
Rappemonads_tb_occur <- Rappemonads_tb[,1:92]
length(Rappemonads_tb_occur[,colSums(Rappemonads_tb_occur) > 0])
## [1] 0
MOCH_1_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "MOCH-1"),]
MOCH_1_tb_occur <- MOCH_1_tb[,1:92]
length(MOCH_1_tb_occur[,colSums(MOCH_1_tb_occur) > 0])
## [1] 0
MOCH_2_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "MOCH-2"),]
MOCH_2_tb_occur <- MOCH_2_tb[,1:92]
length(MOCH_2_tb_occur[,colSums(MOCH_2_tb_occur) > 0])
## [1] 0
MOCH_5_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "MOCH-5"),]
MOCH_5_tb_occur <- MOCH_5_tb[,1:92]
length(MOCH_5_tb_occur[,colSums(MOCH_5_tb_occur) > 0])
## [1] 0
Prasinophyceae_clade_VII_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Prasinophyceae_clade-VII"),]
Prasinophyceae_clade_VII_tb_occur <- Prasinophyceae_clade_VII_tb[,1:92]
length(Prasinophyceae_clade_VII_tb_occur[,colSums(Prasinophyceae_clade_VII_tb_occur) > 0])
## [1] 0
Prasinophyceae_clade_IX_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Prasinophyceae_clade-IX"),]
Prasinophyceae_clade_IX_tb_occur <- Prasinophyceae_clade_IX_tb[,1:92]
length(Prasinophyceae_clade_IX_tb_occur[,colSums(Prasinophyceae_clade_IX_tb_occur) > 0])
## [1] 0
Pyramimonadaceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Pyramimonadaceae"),]
Pyramimonadaceae_tb_occur <- Pyramimonadaceae_tb[,1:92]
length(Pyramimonadaceae_tb_occur[,colSums(Pyramimonadaceae_tb_occur) > 0])
## [1] 0
##                      reads_per_class OTUs_per_class
## Bacillariophyceae               3681            103
## Bolidophyceae                   2942             15
## Chlorarachniophyceae            3490             70
## Chrysophyceae                  80489            367
## Cryptophyceae                   9760             75
##                          reads_per_class OTUs_per_class samples_per_class
## Dinophyceae                       666342          10065                 0
## Chrysophyceae                      80489            367                 0
## Prasinophyceae                     51443            194                 0
## Pelagophyceae                      46762            329                 0
## Dictyochophyceae                   39651            239                 0
## Prasinophyceae_clade-VII           24046            126                 0
## MOCH-2                             17211            136                 0
## Prasinophyceae_clade-IX            12147             81                 0
## Cryptophyceae                       9760             75                 0
## MOCH-1                              4429             72                 0
## Bacillariophyceae                   3681            103                 0
## Chlorarachniophyceae                3490             70                 0
## Bolidophyceae                       2942             15                 0
## MOCH-5                              2473             22                 0
## Pinguiophyceae                      1647             23                 0
## other_Prasinophyceae                 409              7                 0
## Prymnesiophyceae                     371             58                 0
## Mamiellophyceae                      337             35                 0
## Eustigmatales                        237              9                 0
## Raphydophyceae                       209              4                 0
## Trebouxiophyceae                     100              3                 0
## Pyramimonadaceae                      97              3                 0
## Ulvophyceae                           16              3                 0

1.6.1.2) Relative values

##   reads_per_class    OTUs_per_class samples_per_class 
##               100               100                 0
##                          reads_per_class OTUs_per_class samples_per_class
## Dinophyceae                 68.816438068    83.60328931                 0
## Chrysophyceae                8.312497612     3.04842595                 0
## Prasinophyceae               5.312773356     1.61142952                 0
## Pelagophyceae                4.829343306     2.73278512                 0
## Dictyochophyceae             4.094955122     1.98521472                 0
## Prasinophyceae_clade-VII     2.483349496     1.04659855                 0
## MOCH-2                       1.777465199     1.12966193                 0
## Prasinophyceae_clade-IX      1.254480842     0.67281336                 0
## Cryptophyceae                1.007963532     0.62297533                 0
## MOCH-1                       0.457404762     0.59805632                 0
## Bacillariophyceae            0.380155098     0.85555279                 0
## Chlorarachniophyceae         0.360429582     0.58144364                 0
## Bolidophyceae                0.303834909     0.12459507                 0
## MOCH-5                       0.255398956     0.18273943                 0
## Pinguiophyceae               0.170093846     0.19104577                 0
## other_Prasinophyceae         0.042239455     0.05814436                 0
## Prymnesiophyceae             0.038315007     0.48176759                 0
## Mamiellophyceae              0.034803659     0.29072182                 0
## Eustigmatales                0.024476164     0.07475704                 0
## Raphydophyceae               0.021584465     0.03322535                 0
## Trebouxiophyceae             0.010327495     0.02491901                 0
## Pyramimonadaceae             0.010017670     0.02491901                 0
## Ulvophyceae                  0.001652399     0.02491901                 0



Reads per class vs. OTUs per class:



Reads per class vs. samples in which they occurr:

1.6.2) Non-normalized data

## [1] 43966   100
##   Row.names st055_MD1013 st057_MD1042 st058_MD1080 st059_MD1100
## 1     OTU_1          579         5968         2609         1500
## 2    OTU_10           26            5            1           26
## 3   OTU_100           31          159           21            8
## 4  OTU_1000            0           95           26           21
## 5 OTU_10000            0            0            0            0
## [1] 43966    99
##           st055_MD1013 st057_MD1042 st058_MD1080 st059_MD1100 st009_MD111
## OTU_1              579         5968         2609         1500         815
## OTU_10              26            5            1           26         309
## OTU_100             31          159           21            8        1340
## OTU_1000             0           95           26           21           0
## OTU_10000            0            0            0            0           0
## [1] 43966   100
##       st055_MD1013 st057_MD1042 st058_MD1080 st059_MD1100 st009_MD111
## OTU_1          579         5968         2609         1500         815
## OTU_2         9289        16285         5140         1187        8207
## OTU_3           15           96           18            6        1061
## OTU_4          230          776          150          561         337
## OTU_5           68          513          287          604         396
## [1] 13720   100
##        st055_MD1013 st057_MD1042 st058_MD1080 st059_MD1100 st009_MD111
## OTU_1           579         5968         2609         1500         815
## OTU_4           230          776          150          561         337
## OTU_18         1042         4241         5755         3019        1129
## OTU_25         1353         6434         4523         4370        3060
## OTU_33          185          745         1559          258         615
##        st055_MD1013 st057_MD1042 st058_MD1080 st059_MD1100 st009_MD111
## OTU_1           579         5968         2609         1500         815
## OTU_4           230          776          150          561         337
## OTU_18         1042         4241         5755         3019        1129
## OTU_25         1353         6434         4523         4370        3060
## OTU_33          185          745         1559          258         615
## [1] 11479    92
## [1] 92
## [1] 92
## [1] 92
## [1] 92
## [1] 91
## [1] 92
## [1] 0
## [1] 0
## [1] 0
## [1] 90
## [1] 0
## [1] 81
## [1] 49
## [1] 31
## [1] 0
## [1] 5
## [1] 62
## [1] 4
##                     Group.1       x
## 7               Dinophyceae 1807655
## 4             Chrysophyceae  195117
## 13            Pelagophyceae  121123
## 15           Prasinophyceae  111654
## 6          Dictyochophyceae  104798
## 17 Prasinophyceae_clade-VII   56618
## 10                   MOCH-2   46120
## 16  Prasinophyceae_clade-IX   30109
## 5             Cryptophyceae   27275
## 9                    MOCH-1   13884
## 3      Chlorarachniophyceae   10322
## 1         Bacillariophyceae    9342
## 2             Bolidophyceae    7241
## 11                   MOCH-5    7165
## 14           Pinguiophyceae    5345
## 18         Prymnesiophyceae    1074
## 23     other_Prasinophyceae    1063
## 12          Mamiellophyceae     935
## 20           Raphydophyceae     664
## 8             Eustigmatales     573
## 19         Pyramimonadaceae     254
## 21         Trebouxiophyceae     193
## 22              Ulvophyceae      50
##                     Group.1     x
## 7               Dinophyceae 11479
## 4             Chrysophyceae   418
## 13            Pelagophyceae   386
## 6          Dictyochophyceae   265
## 15           Prasinophyceae   218
## 10                   MOCH-2   155
## 17 Prasinophyceae_clade-VII   133
## 1         Bacillariophyceae   111
## 5             Cryptophyceae    95
## 16  Prasinophyceae_clade-IX    86
## 9                    MOCH-1    83
## 3      Chlorarachniophyceae    80
## 18         Prymnesiophyceae    74
## 12          Mamiellophyceae    36
## 14           Pinguiophyceae    27
## 11                   MOCH-5    25
## 2             Bolidophyceae    16
## 23     other_Prasinophyceae    11
## 8             Eustigmatales     9
## 20           Raphydophyceae     4
## 19         Pyramimonadaceae     3
## 21         Trebouxiophyceae     3
## 22              Ulvophyceae     3
##                      reads_per_class OTUs_per_class
## Bacillariophyceae               9342            111
## Bolidophyceae                   7241             16
## Chlorarachniophyceae           10322             80
## Chrysophyceae                 195117            418
## Cryptophyceae                  27275             95
##                          reads_per_class OTUs_per_class samples_per_class
## Dinophyceae                      1807655          11479                92
## Chrysophyceae                     195117            418                92
## Pelagophyceae                     121123            386                92
## Prasinophyceae                    111654            218               113
## Dictyochophyceae                  104798            265               116
## Prasinophyceae_clade-VII           56618            133               103
## MOCH-2                             46120            155               112
## Prasinophyceae_clade-IX            30109             86               112
## Cryptophyceae                      27275             95               108
## MOCH-1                             13884             83               111
## Chlorarachniophyceae               10322             80                97
## Bacillariophyceae                   9342            111                65
## Bolidophyceae                       7241             16                35
## MOCH-5                              7165             25                75
## Pinguiophyceae                      5345             27                75
## Prymnesiophyceae                    1074             74                50
## other_Prasinophyceae                1063             11                18
## Mamiellophyceae                      935             36                 1
## Raphydophyceae                       664              4                 1
## Eustigmatales                        573              9                 1
## Pyramimonadaceae                     254              3                 1
## Trebouxiophyceae                     193              3                 1
## Ulvophyceae                           50              3                 1
##   reads_per_class    OTUs_per_class samples_per_class 
##               100               100              1600
##                          reads_per_class OTUs_per_class samples_per_class
## Dinophyceae                 70.650878184    83.66618076        100.000000
## Chrysophyceae                7.626005736     3.04664723        100.000000
## Pelagophyceae                4.734004176     2.81341108        100.000000
## Prasinophyceae               4.363915212     1.58892128        122.826087
## Dictyochophyceae             4.095953449     1.93148688        126.086957
## Prasinophyceae_clade-VII     2.212873265     0.96938776        111.956522
## MOCH-2                       1.802566586     1.12973761        121.739130
## Prasinophyceae_clade-IX      1.176788320     0.62682216        121.739130
## Cryptophyceae                1.066023496     0.69241983        117.391304
## MOCH-1                       0.542646021     0.60495627        120.652174
## Chlorarachniophyceae         0.403427847     0.58309038        105.434783
## Bacillariophyceae            0.365125261     0.80903790         70.652174
## Bolidophyceae                0.283009207     0.11661808         38.043478
## MOCH-5                       0.280038803     0.18221574         81.521739
## Pinguiophyceae               0.208905429     0.19679300         81.521739
## Prymnesiophyceae             0.041976507     0.53935860         54.347826
## other_Prasinophyceae         0.041546580     0.08017493         19.565217
## Mamiellophyceae              0.036543794     0.26239067          1.086957
## Raphydophyceae               0.025951956     0.02915452          1.086957
## Eustigmatales                0.022395287     0.06559767          1.086957
## Pyramimonadaceae             0.009927405     0.02186589          1.086957
## Trebouxiophyceae             0.007543264     0.02186589          1.086957
## Ulvophyceae                  0.001954214     0.02186589          1.086957

Reads per class vs. OTUs per class:



Reads OTUs per class vs. samples in which they occurr: